Set working directory
## [1] "/Users/gabays/github/RiseAndFall"
Load packages
if(!require("ggplot2")){
install.packages("ggplot2")
library(ggplot2)
}
if(!require("roll")){
install.packages("roll")
library(roll)
}
if(!require("purrr")){
install.packages("purrr")
library(purrr)
}
if(!require("stylo")){
install.packages("stylo")
library(stylo)
}Load external functions
Load previously computed data
Get Metadata and corpus as 3-grams
#Metadata
metadata = read.csv(file="./metadata.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
metadata = metadata[sort(rownames(metadata)), ]
#data
data = read.csv(file="./feats_tests_n3_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
data_stop = read.csv(file="./feats_tests_n1_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)We compute de distribution: which 3grams are relevant?
We work with the 2000 most frequent 3grams – after that the curve is totally flat
We normalise the vectors
d <- t(d)
#normalisation
d <- normalisations(d)
#stopwords
d_stop <- t(d_stop)
d_stop <- normalisations(d_stop)We add the metadata
# we add the metadata
control<-rbind(rownames(metadata),colnames(d))
# we control that we have similar values
head(t(control))## [,1]
## [1,] "abeille-argelie"
## [2,] "abeille-coriolan"
## [3,] "abeille-lyncee"
## [4,] "about-risette"
## [5,] "adenis-homme-qui-ne-peut-pas-siffler"
## [6,] "aigueberre-avare-amoureux"
## [,2]
## [1,] "abeille-argelie.txt"
## [2,] "abeille-coriolan.txt"
## [3,] "abeille-lyncee.txt"
## [4,] "about-risette.txt"
## [5,] "adenis-homme-qui-ne-peut-pas-siffler.txt"
## [6,] "aigueberre-avare-amoureux.txt"
## [,1]
## [1511,] "voltaire-tanis-zelide"
## [1512,] "voltaire-zaire"
## [1513,] "voltaire-zulime"
## [1514,] "vondrebeck-alard-forces-de-l-amour"
## [1515,] "zola-madeleine"
## [1516,] "zola-therese-raquin"
## [,2]
## [1511,] "voltaire-tanis-zelide.txt"
## [1512,] "voltaire-zaire.txt"
## [1513,] "voltaire-zulime.txt"
## [1514,] "vondrebeck-alard-forces-de-l-amour.txt"
## [1515,] "zola-madeleine.txt"
## [1516,] "zola-therese-raquin.txt"
Alternative: loading plays in plain text (for later)
corpus<-list()
#Get the list of all txt files
TxtFiles <- list.files(path = "txt",pattern = "txt$")
#Loop over all files
for(x in TxtFiles){
#Get the path
FullPath <- paste("txt", x, sep="/")
#Get the name (drop .txt extension)
# TextName <- sub("\\.txt", "", x)
#Get the text in the file
# FullText <- suppressWarnings(read.csv(FullPath, header = FALSE, sep = "\n", fileEncoding="UTF-8"))
FullText <- suppressWarnings(readLines(FullPath))
#Append the text to the corpus
corpus<-append(corpus,FullText)
}
View(corpus)We transform the loaded texts into minable data
#tokenisation
corpus.tok = lapply(corpus, txt.to.words2)
#Counting frequency of tokens
corpus.tok.list = make.frequency.list(corpus.tok)
#Transform frequency into a table
corpus.tok.list.freq=make.table.of.frequencies(corpus.tok, corpus.tok.list, relative = F)## processing 1516 text samples
## .......................................................................................................................................................
## combining frequencies into a table...
#I name columns
row.names(corpus.tok.list.freq)=TxtFiles
#I save a copy
write.csv(corpus.tok.list.freq, file = "corpus.bench.tok.list.freq.csv",row.names=TRUE)
#Convert table into dataframe
corpus.tok.list.freq = as.data.frame(read.csv(file="corpus.bench.tok.list.freq.csv", sep = ",", header = TRUE, row.names=1, quote = '\"'))
#transposition (rows become columns)
corpus.tok.list.freq = t(corpus.tok.list.freq)
#normalisation
corpus.tok.list.freq = normalisations(corpus.tok.list.freq)
#Displaying the dataframe
View(corpus.tok.list.freq)We control that stopwords do identify genres
distToTragedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "tragedy"]))
distToComedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "comedy"]))
m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")
#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")
#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
lty = 1, # Grid line type
col = "gray", # Grid line color
lwd = 1) # Grid line width# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays
#Save the image
png("./R/images/clusters_stop.png", width = 2500, height = 2000, res=300)
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() ## quartz_off_screen
## 2
We control tragedies classified with comedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >10)## DistTragedyCentroid DistComedyCentroid
## aubignac-pucelle-prose.txt 10.13830 9.848412
## delavigne-famille-temps-luther.txt 10.05154 10.756579
## mathieu-magicienne-etrangere.txt 10.57459 11.594939
## piron-nouvelle-messaline.txt 10.26150 10.815560
## puget-de-la-serre-pandoste-ii.txt 10.58983 10.440721
## puget-de-la-serre-thesee.txt 10.67992 10.032731
## puget-de-la-serre-thomas-morus.txt 10.51248 10.700152
## viau-pyrame.txt 10.18889 11.218962
## literaryGenre
## aubignac-pucelle-prose.txt 10
## delavigne-famille-temps-luther.txt 10
## mathieu-magicienne-etrangere.txt 10
## piron-nouvelle-messaline.txt 10
## puget-de-la-serre-pandoste-ii.txt 10
## puget-de-la-serre-thesee.txt 10
## puget-de-la-serre-thomas-morus.txt 10
## viau-pyrame.txt 10
We control comedies classified with tragedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <9)## DistTragedyCentroid DistComedyCentroid
## archambault-etrennes.txt 8.422193 8.746238
## chapuzeau-geneve-delivree.txt 8.680547 11.143438
## cinq-auteurs-comedie-des-tuileries.txt 8.785608 11.072267
## colle-alfonse.txt 8.706562 11.740707
## corneillep-illusion-comique.txt 8.577145 11.471762
## corneillep-melite.txt 8.889448 10.986803
## corneillet-geolier-de-sois-meme.txt 7.832000 10.080357
## corneillet-illustres-ennemis.txt 8.475145 10.324881
## cubieres-palmezeaux-lacrymanie.txt 8.509334 11.291061
## labaume-messe-de-gnide.txt 8.163510 10.605135
## moline-legislatrices.txt 8.173196 7.149285
## rotrou-bague-de-l-oubli.txt 8.675853 10.997791
## rotrou-belle-alphrede.txt 8.238488 11.563174
## rotrou-sosies.txt 8.589527 10.456716
## saint-roman-dialogue.txt 8.869912 11.717547
## literaryGenre
## archambault-etrennes.txt 2
## chapuzeau-geneve-delivree.txt 2
## cinq-auteurs-comedie-des-tuileries.txt 2
## colle-alfonse.txt 2
## corneillep-illusion-comique.txt 2
## corneillep-melite.txt 2
## corneillet-geolier-de-sois-meme.txt 2
## corneillet-illustres-ennemis.txt 2
## cubieres-palmezeaux-lacrymanie.txt 2
## labaume-messe-de-gnide.txt 2
## moline-legislatrices.txt 2
## rotrou-bague-de-l-oubli.txt 2
## rotrou-belle-alphrede.txt 2
## rotrou-sosies.txt 2
## saint-roman-dialogue.txt 2
We control that 3-grams do identify genres
distToTragedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "tragedy"]))
distToComedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "comedy"]))
m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")
#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")
#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
lty = 1, # Grid line type
col = "gray", # Grid line color
lwd = 1) # Grid line width# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays
#Save the image
png("./R/images/clusters_3grams.png", width = 2500, height = 2000, res=300)
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() ## quartz_off_screen
## 2
Results are more precise than with stopwords.
We control tragedies classified with comedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >34)## DistTragedyCentroid DistComedyCentroid
## champrepus-ulysse.txt 35.01097 36.45605
## delavigne-famille-temps-luther.txt 34.70682 35.44638
## deshoulieres-mort-de-cochon.txt 34.30966 31.55398
## puget-de-la-serre-pandoste-ii.txt 35.03667 35.12204
## soret-ceciliade.txt 34.31754 34.05263
## literaryGenre
## champrepus-ulysse.txt 10
## delavigne-famille-temps-luther.txt 10
## deshoulieres-mort-de-cochon.txt 10
## puget-de-la-serre-pandoste-ii.txt 10
## soret-ceciliade.txt 10
We control comedies classified with tragedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <29)## DistTragedyCentroid DistComedyCentroid
## diderot-fils-naturel.txt 28.91923 26.40822
## doruxigne-alzate.txt 28.82176 34.88925
## genlis-belle-et-la-bete.txt 27.18629 20.95635
## literaryGenre
## diderot-fils-naturel.txt 2
## doruxigne-alzate.txt 2
## genlis-belle-et-la-bete.txt 2
## [1] "" "comedy" "dialogue" "drama" "farce"
## [6] "monologue" "opera" "proverbe" "saynete" "tragedy"
## [11] "tragicomedy" "vaudeville"
#Get genre of all plays
clusters <- metadata[, "Genre"]
levels(clusters) <- c(1:13)
#Compute inertia
clusterInertia(t(d), as.numeric(clusters))## [1] 198.08189 625.92647 23.60303 29.55105 10.32456 56.62311 11.61778
## [8] 112.83419 21.57262 212.44350 61.53742 5.48289
Comedy (with 3-grams)
comedies = d[, metadata[, "Genre"] == "comedy"]
comediesToCentroid = DistToCentroid(comedies, method="manhattan")
summary(comediesToCentroid)## DistToCentroid
## Min. :20.96
## 1st Qu.:30.10
## Median :31.50
## Mean :31.35
## 3rd Qu.:32.79
## Max. :37.47
## genlis-belle-et-la-bete.txt lesage-dorneval-ile-gougou.txt
## 20.95635 21.58979
## liborliere-cloison.txt dancourt-mari-retrouve.txt
## 25.91816 26.15407
## palissot-barbier-de-bagdad.txt dancourt-impromptu-de-garnison.txt
## 26.17055 26.28167
## colle-alfonse.txt corneillep-illusion-comique.txt
## 36.08137 36.64387
## chapuzeau-geneve-delivree.txt quinault-comedie-sans-comedie.txt
## 36.64600 36.71626
## rotrou-belle-alphrede.txt colle-cocatrix.txt
## 36.87928 37.47040
Tragedies (with 3-grams)
tragedies = d[, metadata[, "Genre"] == "tragedy"]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
summary(tragediesToCentroid)## DistToCentroid
## Min. :18.76
## 1st Qu.:24.87
## Median :26.27
## Mean :26.82
## 3rd Qu.:28.29
## Max. :35.04
## la-thuilerie-soliman.txt barbier-mort-de-cesar.txt saurin-spartacus.txt
## 18.76257 20.31262 21.40985
## genest-zelonide.txt pellegrin-tibere.txt voltaire-mariamne.txt
## 21.86938 22.19090 22.40103
## puget-de-la-serre-thesee.txt deshoulieres-mort-de-cochon.txt
## 33.84736 34.30966
## soret-ceciliade.txt delavigne-famille-temps-luther.txt
## 34.31754 34.70682
## champrepus-ulysse.txt puget-de-la-serre-pandoste-ii.txt
## 35.01097 35.03667
tragedies <- d[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
ggsave("./R/images/tragedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedyW.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tragedies <- d_stop[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tragedies <- d[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedyW.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tragedies <- d_stop[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
We select all the authors with more than 3 plays in the dataset
#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]
#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)## [1] 113
We select all the authors who have more than 1 comedy
authorsSelected=c()
for (x in multiples){
results <- as.data.frame(metadata[metadata$Genre == 'comedy' & metadata$Author == x,])
if(nrow(results)>2){
authorsSelected<-append(authorsSelected,x)
}
}We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:
#create an empty df to store results
df_comedy=data.frame(matrix(ncol = 7, nrow = 0))
#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0
#loop over plays
for (x in plays){
#increment
incr<-incr+1
#get author name
author <- metadata[incr,2]
#get genre
genre<-metadata[incr,4]
#get date
date<-metadata[incr,3]
#if author has written multiple texts present in the corpus and genre is known
if (author %in% authorsSelected==TRUE & genre =='comedy' & date>1500 & date<1800){
#get the data of all the plays of an author
authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
# authorData = d[, metadata[, "Author"] == author]
#get all the plays of the same genre
# genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
#compute distance to centroid of the author
authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
#compute distance to centroid of the genre
genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
#compute the mean of the centroid to the genre
meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
#get the distance of the play to the author
playDistAuthor<-authorToCentroid[x,]
#get the distance of the play to the genre
playDistGenre<-genreToCentroid[x,]
#diff author genre (rounded)
distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
#distance<-round(playDistGenre-playDistAuthor, digits = 2)
#save the result if value is not zero (problem occurred)
control<-playDistAuthor!=''
if (is.na(control)==FALSE){
results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
df_comedy<-rbind(df_comedy,results)
}
}
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_comedy)
colnames(df_comedy) <- xdf_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 240] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_marivaux.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 213] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 68] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_boissy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 262] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_moliere.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() + geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() #+ geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 240] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_marivaux.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 213] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 68] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_boissy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 262] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_moliere.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance")+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Controlling variance
# with 4 plays
#x <- 1:71
x <- 1:nrow(df_comedy)
y<-roll_var(as.numeric(df_comedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)png("./R/images/variance2.png", width = 850, height = 600)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() ## quartz_off_screen
## 2
We select all the authors with more than 3 plays in the dataset
#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]
#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)## [1] 113
We select all the authors who have more than 1 tragedy
authorsSelected=c()
for (x in multiples){
results <- as.data.frame(metadata[metadata$Genre == 'tragedy' & metadata$Author == x,])
if(nrow(results)>2){
authorsSelected<-append(authorsSelected,x)
}
}We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:
#create an empty df to store results
df_tragedy=data.frame(matrix(ncol = 7, nrow = 0))
#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0
#loop over plays
for (x in plays){
#increment
incr<-incr+1
#get author name
author <- metadata[incr,2]
#get genre
genre<-metadata[incr,4]
#get date
date<-metadata[incr,3]
#if author has written multiple texts present in the corpus and genre is known
if (author %in% authorsSelected==TRUE & genre =='tragedy' & date>1500 & date<1800){
#get the data of all the plays of an author
authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
# authorData = d[, metadata[, "Author"] == author]
#get all the plays of the same genre
# genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
#compute distance to centroid of the author
authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
#compute distance to centroid of the genre
genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
#compute the mean of the centroid to the genre
meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
#get the distance of the play to the author
playDistAuthor<-authorToCentroid[x,]
#get the distance of the play to the genre
playDistGenre<-genreToCentroid[x,]
#diff author genre (rounded)
distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
#distance<-round(playDistGenre-playDistAuthor, digits = 2)
#save the result if value is not zero (problem occurred)
control<-playDistAuthor!=''
if (is.na(control)==FALSE){
results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
df_tragedy<-rbind(df_tragedy,results)
}
}
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_tragedy)
colnames(df_tragedy) <- xdf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 213] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 181] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 264] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the author")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend=FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 213] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 181] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 264] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance")+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Controlling variance
# with 4 plays
#x <- 1:71
x <- 1:nrow(df_tragedy)
y<-roll_var(as.numeric(df_tragedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)png("./R/images/variance2.png", width = 850, height = 600)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() ## quartz_off_screen
## 2